package com.xiaojiezhu.spark.rdd.action

import org.apache.spark.{SparkConf, SparkContext}

/**
  * @author 朱小杰
  *         时间 2017-11-29 .22:23
  *         说明 ...
  */
object ScalaAggregate {

  def main(args : Array[String]) :Unit={
    val conf = new SparkConf().setMaster("local").setAppName("app")
    val sc = new SparkContext(conf)
    val rdd = sc.parallelize(List(1,2,3,4))


    //(0,0) 是初始值
    val result = rdd.aggregate((0,0))(
      (acc,value) => (acc._1 + value , acc._2+ 1), //这个值是在累加
      (acc1,acc2) => (acc1._1 + acc2._1 , acc1._2 + acc2._2) //这里是在各各节点的汇总
    )
    println("总值:" + result._1 + " , 数量:" + result._2 + " , 平均值:" + result._1 / result._2)
  }
}
